Advanced Visualization Topics
18.03.2019

library(esquisse)
esquisser()
The purpose of this add-in is to let you explore your data quickly to extract the information they hold.
![]()
library(leaflet)
leaflet() %>%
addTiles() %>% # Add default OpenStreetMap map tiles
addMarkers(lng=174.768, lat=-36.852, popup="The birthplace of R") -> m
m
data(quakes)
leaflet(data = quakes[1:20,]) %>%
addTiles() %>%
addMarkers(~long, ~lat, popup = ~as.character(mag)) -> m
m
A choropleth map (from Greek χῶρος (“area/region”) + πλῆθος (“multitude”)) is a thematic map in which areas are shaded or patterned in proportion to the measurement of the statistical variable being displayed on the map, such as population density or per-capita income.
The key ingredient for a chloropleth is the geometry on which the data is to be projected
https://gadm.org/ is an amazing source for so-called SpatialPolygonsDataFrames (sp) for different administrative Boundaries
ggplot does not speak sp right away
library(rgeos)
library(broom)
library(maptools)
library(tidyverse)
library(sp)
gadm36_DEU_1_sp = readRDS(gzcon(url("https://biogeo.ucdavis.edu/data/gadm3.6/Rsp/gadm36_DEU_1_sp.rds")))
mapdata = fortify(gadm36_DEU_1_sp, "NAME_1")
glimpse(mapdata)
mapdata %>%
ggplot() +
geom_polygon(aes(x=long, y=lat, fill = id, group=group))
mapdata %>%
ggplot() +
geom_polygon(aes(x=long, y=lat, fill = id))
Get the car registration data on Laender level from https://www.kba.de/DE/Statistik/Fahrzeuge/Neuzulassungen/Umwelt/2016/2016_n_umwelt_dusl.html?nn=1978302
Use it to create a proper chloropleth!
library(leaflet)
library(raster)
#get GADM data
usa <- getData("GADM", country="USA", level=1)
usa$randomData <- rnorm(n=nrow(usa), 150, 30)
#create a color palette to fill the polygons
pal <- colorQuantile("Greens", NULL, n = 5)
#create a pop up (onClick)
polygon_popup <- paste0("<strong>Name: </strong>", usa$NAME_1, "<br>",
"<strong>Indicator: </strong>", round(usa$randomData,2))
#create leaflet map
leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
setView(-98.35, 39.7,
zoom = 4) %>%
addPolygons(data = usa,
fillColor= ~pal(randomData),
fillOpacity = 0.4,
weight = 2,
color = "white",
popup = polygon_popup)
Key ingredients
Typically provided as Edgelist easily created with dplyr pipelines adjacency matrix
Useful for visualizations of social or technical systems
The classic and cross-platform quasi standard: igraph
Many different packages available for the ggplot environment
We will look into ggraph / tidyraph
Getting Data
library(tidygraph)
library(tidyverse)
library(ggraph)
library(RCurl)
x <- getURL("https://raw.githubusercontent.com/mathbeveridge/asoiaf/master/data/asoiaf-all-edges.csv")
y <- read.csv(text = x)
Creating Network
y %>%
as_tbl_graph(directed = F) %>%
ggraph() +
geom_edge_link(aes(width = weight), alpha = 0.1) +
geom_node_point() +
geom_node_text(aes(label = name))
y %>%
select(-Type) %>%
gather(x, name, Source:Target) %>%
group_by(name) %>%
summarise(sum_weight = sum(weight)) %>%
ungroup() -> main_ch
main_ch %>%
arrange(desc(sum_weight)) %>%
top_n(40, sum_weight) -> main_ch_l
main_ch_l
# A tibble: 40 x 2
name sum_weight
<chr> <int>
1 Tyrion-Lannister 2873
2 Jon-Snow 2757
3 Cersei-Lannister 2232
4 Joffrey-Baratheon 1762
5 Eddard-Stark 1649
6 Daenerys-Targaryen 1608
7 Jaime-Lannister 1569
8 Sansa-Stark 1547
9 Bran-Stark 1508
10 Robert-Baratheon 1488
# ... with 30 more rows
cooc_all_f <- y %>%
filter(Source %in% main_ch_l$name & Target %in% main_ch_l$name)
as_tbl_graph(cooc_all_f, directed = FALSE) %>%
mutate(neighbors = centrality_degree(),
group = group_infomap(),
keyplayer = node_is_keyplayer(k = 10)) %>%
left_join(main_ch_l) %>%
activate(edges) %>%
filter(!edge_is_multiple()) -> cooc_all_f_graph
layout <- create_layout(cooc_all_f_graph,
layout = "fr")
ggraph(layout) +
geom_edge_density(aes()) +
geom_edge_link(aes(width = weight), alpha = 0.2) +
geom_node_point(aes(color = factor(group),
size=log(sum_weight),
shape=keyplayer)) +
geom_node_text(aes(label = name), size = 4, repel = TRUE) +
scale_color_brewer(palette = "Set1") +
theme_graph() +
labs(title = "A Song of Ice and Fire character network",
subtitle = "Nodes are colored by group") -> network
network
The following code provides you with Hillary Clinton's infamous “email server data”
Apply your network visualization skills to this dataset
require(jsonlite)
if (!file.exists("clinton_emails.rda")) {
clinton_emails <- fromJSON("http://graphics.wsj.com/hillary-clinton-email-documents/api/search.php?subject=&text=&to=&from=&start=&end=&sort=docDate&order=desc&docid=&limit=27159&offset=0")$rows
save(clinton_emails, file="clinton_emails.rda")
}
load("clinton_emails.rda")
#Create data
name=c(3,10,10,3,6,7,8,3,6,1,2,2,6,10,2,3,3,10,4,5,9,10)
feature=paste("feature ", c(1,1,2,2,2,2,2,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5) , sep="")
dat <- data.frame(name,feature)
dat <- with(dat, table(name, feature))
# Charge the circlize library
library(circlize)
# Make the circular plot
chordDiagram(as.data.frame(dat), transparency = 0.5)
require(ggalluvial)
data(majors)
majors$curriculum <- as.factor(majors$curriculum)
ggplot(majors,
aes(x = semester, stratum = curriculum, alluvium = student,
fill = curriculum, label = curriculum)) +
scale_fill_brewer(type = "qual", palette = "Set2") +
geom_flow(stat = "alluvium", lode.guidance = "rightleft",
color = "darkgray") +
geom_stratum() +
theme(legend.position = "bottom") +
ggtitle("student curricula across several semesters") -> sankey
sankey
library(gapminder)
library(ggplot2)
library(gganimate)
ggplot(gapminder, aes(gdpPercap, lifeExp, size = pop, colour = country)) +
geom_point(alpha = 0.7) +
scale_colour_manual(values = country_colors) +
scale_size(range = c(2, 12)) +
scale_x_log10() +
facet_wrap(~continent) +
theme(legend.position = 'none') +
labs(title = 'Year: {frame_time}', x = 'GDP per capita', y = 'life expectancy') +
transition_time(year) -> gapanimation
gapanimation